This file is used to combine three datasets:
We load each individual sample, remove melanocytes, and merge the remaining cells.
library(dplyr)
library(patchwork)
library(ggplot2)
library(ComplexHeatmap)
library(org.Hs.eg.db) # not in the Singularity container :(
.libPaths()
## [1] "/home/nf1/R/x86_64-pc-linux-gnu-library/3.6"
## [2] "/usr/local/lib/R/library"
In this section, we set the global settings of the analysis. We will store data there :
save_name = "data3"
out_dir = "."
n_threads = 5 # for tSNE
We combine the three sample information :
sample_info_1 = readRDS(paste0(out_dir, "/../1_metadata/hs_hd_sample_info.rds"))
sample_info_2 = readRDS(paste0(out_dir, "/../5_wu/1_metadata/wu_sample_info.rds"))
sample_info_3 = readRDS(paste0(out_dir, "/../6_takahashi/1_metadata/takahashi_sample_info.rds"))
column_to_keep = c("project_name", "sample_type", "sample_identifier", "color")
sample_info = rbind.data.frame(sample_info_1[, column_to_keep],
sample_info_2[, column_to_keep],
sample_info_3[, column_to_keep],
stringsAsFactors = FALSE)
graphics::pie(rep(1, nrow(sample_info)),
col = sample_info$color,
labels = sample_info$project_name)
Here are custom colors for each cell type :
color_markers = readRDS(paste0(out_dir, "/../1_metadata/hs_hd_color_markers.rds"))
data.frame(cell_type = names(color_markers),
color = unlist(color_markers)) %>%
ggplot2::ggplot(., aes(x = cell_type, y = 0, fill = cell_type)) +
ggplot2::geom_point(pch = 21, size = 5) +
ggplot2::scale_fill_manual(values = unlist(color_markers), breaks = names(color_markers)) +
ggplot2::theme_classic() +
ggplot2::theme(legend.position = "none",
axis.line = element_blank(),
axis.title = element_blank(),
axis.ticks = element_blank(),
axis.text.y = element_blank())
We load the markers and specific colors for each cell type :
cell_markers = readRDS(paste0(out_dir, "/../1_metadata/hs_hd_cell_markers.rds"))
lengths(cell_markers)
## CD4 T cells CD8 T cells Langerhans cells macrophages
## 13 13 9 10
## B cells cuticle cortex medulla
## 16 15 16 10
## IRS proliferative IBL ORS
## 16 20 15 16
## IFE HFSC melanocytes sebocytes
## 17 17 10 8
We load markers to display on the dotplot :
dotplot_markers = readRDS(paste0(out_dir, "/../1_metadata/hs_hd_dotplot_markers.rds"))
dotplot_markers
## $`CD4 T cells`
## [1] "CD3E" "CD4"
##
## $`CD8 T cells`
## [1] "CD3E" "CD8A"
##
## $`Langerhans cells`
## [1] "CD207" "CPVL"
##
## $macrophages
## [1] "TREM2" "MSR1"
##
## $`B cells`
## [1] "CD79A" "CD79B"
##
## $cuticle
## [1] "KRT32" "KRT35"
##
## $cortex
## [1] "KRT31" "PRR9"
##
## $medulla
## [1] "BAMBI" "ADLH1A3"
##
## $IRS
## [1] "KRT71" "KRT73"
##
## $proliferative
## [1] "TOP2A" "MCM5"
##
## $IBL
## [1] "KRT16" "KRT6C"
##
## $ORS
## [1] "KRT15" "GPX2"
##
## $IFE
## [1] "SPINK5" "LY6D"
##
## $HFSC
## [1] "DIO2" "TCEAL2"
##
## $melanocytes
## [1] "DCT" "MLANA"
##
## $sebocytes
## [1] "CLMP" "PPARG"
For each sample, we :
We load individual datasets :
sobj_list = list()
# Our data
project_names_oi = sample_info_1$project_name
sobj_list[["here"]] = lapply(project_names_oi, FUN = function(one_project_name) {
subsobj = readRDS(paste0(out_dir, "/../2_individual/datasets/",
one_project_name, "_sobj_filtered.rds"))
return(subsobj)
})
names(sobj_list[["here"]]) = project_names_oi
# Wu data
project_names_oi = sample_info_2$project_name
sobj_list[["wu"]] = lapply(project_names_oi, FUN = function(one_project_name) {
subsobj = readRDS(paste0(out_dir, "/../5_wu/2_individual/datasets/",
one_project_name, "_sobj_filtered.rds"))
return(subsobj)
})
names(sobj_list[["wu"]]) = project_names_oi
# Takahashi data
project_names_oi = sample_info_3$project_name
sobj_list[["takahashi"]] = lapply(project_names_oi, FUN = function(one_project_name) {
subsobj = readRDS(paste0(out_dir, "/../6_takahashi/2_individual/datasets/",
one_project_name, "_sobj_filtered.rds"))
return(subsobj)
})
names(sobj_list[["takahashi"]]) = project_names_oi
# Unlist
sobj_list = unlist(sobj_list, recursive = FALSE)
lapply(sobj_list, FUN = dim) %>%
do.call(rbind, .) %>%
rbind(., colSums(.))
## [,1] [,2]
## here.2021_31 27955 1043
## here.2021_36 27955 602
## here.2021_41 27955 2256
## here.2022_03 27955 3977
## here.2022_14 27955 2588
## here.2022_01 27955 1213
## here.2022_02 27955 2286
## wu.F18 27955 1372
## wu.F31B 27955 4786
## wu.F31W 27955 3520
## wu.F59 27955 2445
## wu.F62B 27955 3279
## wu.F62W 27955 2360
## takahashi.GSM3717034 12060 2421
## takahashi.GSM3717035 14154 2832
## takahashi.GSM3717036 17354 2656
## takahashi.GSM3717037 32738 5768
## takahashi.GSM3717038 32738 5874
## 472459 51278
We represent cells in the tSNE :
name2D = "RNA_pca_20_tsne"
We look at cell type annotation for each dataset :
plot_list = lapply(sobj_list, FUN = function(one_sobj) {
mytitle = as.character(unique(one_sobj$project_name))
mysubtitle = ncol(one_sobj)
p = Seurat::DimPlot(one_sobj, group.by = "cell_type",
reduction = name2D) +
ggplot2::scale_color_manual(values = color_markers,
breaks = names(color_markers),
name = "Cell Type") +
ggplot2::labs(title = mytitle,
subtitle = paste0(mysubtitle, " cells")) +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5)) +
Seurat::NoAxes()
return(p)
})
plot_list[[length(plot_list) + 1]] = patchwork::guide_area()
patchwork::wrap_plots(plot_list, ncol = 4) +
patchwork::plot_layout(guides = "collect") &
ggplot2::theme(legend.position = "right")
and clustering :
plot_list = lapply(sobj_list, FUN = function(one_sobj) {
mytitle = as.character(unique(one_sobj$project_name))
mysubtitle = ncol(one_sobj)
p = Seurat::DimPlot(one_sobj, group.by = "seurat_clusters",
reduction = name2D, label = TRUE) +
ggplot2::labs(title = mytitle,
subtitle = paste0(mysubtitle, " cells")) +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5)) +
Seurat::NoAxes() + Seurat::NoLegend()
return(p)
})
patchwork::wrap_plots(plot_list, ncol = 4)
For each individual dataset, we remove melanocytes. First, we smooth cell type annotation at a cluster level :
sobj_list = lapply(sobj_list, FUN = function(one_sobj) {
cluster_type = table(one_sobj$cell_type, one_sobj$seurat_clusters) %>%
prop.table(., margin = 2) %>%
apply(., 2, which.max)
cluster_type = setNames(nm = names(cluster_type),
levels(one_sobj$cell_type)[cluster_type])
one_sobj$cluster_type = cluster_type[one_sobj$seurat_clusters]
## Output
return(one_sobj)
})
To locate melanocytes, we look at their score, cell type annotation, and clustering.
plot_list = lapply(sobj_list, FUN = function(one_sobj) {
project_name = as.character(unique(one_sobj$project_name))
plot_sublist = list()
# Score
plot_sublist[[1]] = Seurat::FeaturePlot(one_sobj, reduction = name2D,
features = "score_melanocytes") +
ggplot2::labs(title = project_name,
subtitle = "Melanocytes score") +
Seurat::NoAxes() +
ggplot2::scale_color_gradientn(colors = aquarius:::color_gene) +
ggplot2::theme(aspect.ratio = 1,
plot.subtitle = element_text(hjust = 0.5))
# Cell type
plot_sublist[[2]] = Seurat::DimPlot(one_sobj,
reduction = name2D,
group.by = "cell_type",
order = "melanocytes") +
ggplot2::scale_color_manual(values = c("purple", rep("gray92", length(color_markers) - 1)),
breaks = c("melanocytes", setdiff(names(color_markers), "melanocytes"))) +
ggplot2::labs(title = "Cell type annotation",
subtitle = paste0(sum(one_sobj$cell_type == "melanocytes"),
" melanocytes")) +
Seurat::NoAxes() + Seurat::NoLegend() +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
# Clusters
plot_sublist[[3]] = Seurat::DimPlot(one_sobj,
reduction = name2D,
group.by = "seurat_clusters",
label = TRUE) +
ggplot2::labs(title = "Clusters") +
Seurat::NoAxes() + Seurat::NoLegend() +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
# Cluster type
plot_sublist[[4]] = Seurat::DimPlot(one_sobj,
reduction = name2D,
group.by = "cluster_type") +
ggplot2::scale_color_manual(values = c("purple", rep("gray92", length(color_markers) - 1)),
breaks = c("melanocytes", setdiff(names(color_markers), "melanocytes"))) +
ggplot2::labs(title = "Cluster annotation",
subtitle = paste0(sum(one_sobj$cluster_type == "melanocytes"),
" melanocytes")) +
Seurat::NoAxes() + Seurat::NoLegend() +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
return(plot_sublist)
}) %>% unlist(., recursive = FALSE)
patchwork::wrap_plots(plot_list, ncol = 4)
We remove melanocytes based on cluster annotation :
sobj_list = lapply(sobj_list, FUN = function(one_sobj) {
one_sobj$is_of_interest = (one_sobj$cluster_type != "melanocytes")
if (sum(one_sobj$is_of_interest) > 0) {
one_sobj = subset(one_sobj, is_of_interest == TRUE)
} else {
one_sobj = NA
}
one_sobj$is_of_interest = NULL
return(one_sobj)
})
lapply(sobj_list, FUN = dim) %>%
do.call(rbind, .) %>%
rbind(., colSums(.))
## [,1] [,2]
## here.2021_31 27955 885
## here.2021_36 27955 528
## here.2021_41 27955 1982
## here.2022_03 27955 3457
## here.2022_14 27955 2422
## here.2022_01 27955 835
## here.2022_02 27955 2002
## wu.F18 27955 1372
## wu.F31B 27955 4624
## wu.F31W 27955 3520
## wu.F59 27955 2445
## wu.F62B 27955 3221
## wu.F62W 27955 2360
## takahashi.GSM3717034 12060 2278
## takahashi.GSM3717035 14154 2832
## takahashi.GSM3717036 17354 2527
## takahashi.GSM3717037 32738 5465
## takahashi.GSM3717038 32738 5667
## 472459 48422
We remove melanocytes from annotation :
cell_markers = cell_markers[names(cell_markers) != "melanocytes"]
color_markers = color_markers[names(color_markers) != "melanocytes"]
dotplot_markers = dotplot_markers[names(dotplot_markers) != "melanocytes"]
We re-annote cells for cell type, since melanocytes have been removed :
sobj_list = lapply(sobj_list, FUN = function(one_sobj) {
# Remove old annotation
one_sobj@meta.data[, grep(colnames(one_sobj@meta.data), pattern = "score", value = TRUE)] = NULL
# Re-annot
one_sobj = aquarius::cell_annot_custom(one_sobj,
newname = "cell_type",
markers = cell_markers,
use_negative = TRUE,
add_score = FALSE,
verbose = TRUE)
# Set factor levels
one_sobj$cell_type = factor(one_sobj$cell_type, levels = names(cell_markers))
return(one_sobj)
})
To which extent gene names are common between all datasets ?
gene_names = lapply(sobj_list, FUN = rownames)
obj_upset = ComplexHeatmap::make_comb_mat(gene_names, mode = "distinct")
ComplexHeatmap::UpSet(obj_upset)
For our data and Wu data, we have the correspondence between gene
names and Ensembl IDs. For Takahashi data, we try to find them using
bitr function from clusterProfiler
package.
annotation = clusterProfiler::bitr(
geneID = unique(unlist(gene_names)),
fromType = "SYMBOL",
toType = "ENSEMBL",
OrgDb = org.Hs.eg.db,
drop = FALSE)
head(annotation)
## SYMBOL ENSEMBL
## 1 MIR1302-2HG <NA>
## 2 FAM138A ENSG00000237613
## 3 OR4F5 ENSG00000186092
## 4 AL627309.1 <NA>
## 5 AL627309.3 <NA>
## 6 AL627309.4 <NA>
(Time to run : 0.53 s)
There is a lot of NA. Given that our data and Wu data were mapped using the same annotation, we complete the annotation table using the Ensembl ID we stored in each individual Seurat objects:
annotation = dplyr::left_join(
x = annotation,
y = sobj_list[["here.2021_31"]]@assays[["RNA"]]@meta.features[, c("Ensembl_ID", "gene_name")],
by = c("SYMBOL" = "gene_name"))
head(annotation)
## SYMBOL ENSEMBL Ensembl_ID
## 1 MIR1302-2HG <NA> ENSG00000243485
## 2 FAM138A ENSG00000237613 ENSG00000237613
## 3 OR4F5 ENSG00000186092 ENSG00000186092
## 4 AL627309.1 <NA> ENSG00000238009
## 5 AL627309.3 <NA> ENSG00000239945
## 6 AL627309.4 <NA> ENSG00000241599
There is still a lot of data. We try to complete the dataframe using
the biomaRt package:
httr::set_config(httr::config(ssl_verifypeer = FALSE))
mart = biomaRt::useEnsembl(biomart = "ensembl",
dataset = "hsapiens_gene_ensembl")
corresp = biomaRt::getBM(attributes = c("ensembl_gene_id", "hgnc_symbol"),
filters = "hgnc_symbol",
values = unique(unlist(gene_names)),
mart = mart)
head(corresp)
## ensembl_gene_id hgnc_symbol
## 1 ENSG00000184389 A3GALT2
## 2 ENSG00000188984 AADACL3
## 3 ENSG00000204518 AADACL4
## 4 ENSG00000131584 ACAP3
## 5 ENSG00000162390 ACOT11
## 6 ENSG00000097021 ACOT7
(Time to run : 21.85 s)
We merge this information with the annotation table:
annotation = dplyr::left_join(
x = annotation,
y = corresp,
by = c("SYMBOL" = "hgnc_symbol"))
head(annotation)
## SYMBOL ENSEMBL Ensembl_ID ensembl_gene_id
## 1 MIR1302-2HG <NA> ENSG00000243485 ENSG00000243485
## 2 FAM138A ENSG00000237613 ENSG00000237613 ENSG00000237613
## 3 OR4F5 ENSG00000186092 ENSG00000186092 ENSG00000186092
## 4 AL627309.1 <NA> ENSG00000238009 <NA>
## 5 AL627309.3 <NA> ENSG00000239945 <NA>
## 6 AL627309.4 <NA> ENSG00000241599 <NA>
We merge the two Ensembl columns:
annotation = annotation %>%
dplyr::mutate(ENSEMBL_ID = ifelse(
!is.na(ENSEMBL),
yes = as.character(ENSEMBL),
no = ifelse(!is.na(ensembl_gene_id),
yes = as.character(ensembl_gene_id),
no = as.character(Ensembl_ID)))) %>%
dplyr::select(SYMBOL, ENSEMBL_ID) %>%
unique()
head(annotation)
## SYMBOL ENSEMBL_ID
## 1 MIR1302-2HG ENSG00000243485
## 2 FAM138A ENSG00000237613
## 3 OR4F5 ENSG00000186092
## 4 AL627309.1 ENSG00000238009
## 5 AL627309.3 ENSG00000239945
## 6 AL627309.4 ENSG00000241599
How many NA data are remaining ?
table(is.na(annotation$ENSEMBL_ID))
##
## FALSE TRUE
## 34854 14597
For which dataset Ensembl IDs are not available ?
genes_without_ensembl = annotation %>%
dplyr::filter(is.na(ENSEMBL_ID)) %>%
dplyr::pull(SYMBOL)
lapply(gene_names, FUN = function(sample_genes) {
return(table(sample_genes %in% genes_without_ensembl))
}) %>% do.call(rbind, .) %>%
`colnames<-`(c("ID available", "ID not available"))
## ID available ID not available
## here.2021_31 27935 20
## here.2021_36 27935 20
## here.2021_41 27935 20
## here.2022_03 27935 20
## here.2022_14 27935 20
## here.2022_01 27935 20
## here.2022_02 27935 20
## wu.F18 27935 20
## wu.F31B 27935 20
## wu.F31W 27935 20
## wu.F59 27935 20
## wu.F62B 27935 20
## wu.F62W 27935 20
## takahashi.GSM3717034 10976 1084
## takahashi.GSM3717035 12996 1158
## takahashi.GSM3717036 15384 1970
## takahashi.GSM3717037 20530 12208
## takahashi.GSM3717038 20530 12208
The fact that 20 ID are not available in our data and Wu data is due to genes sharing the same Ensembl ID… We will still keep all the genes available in these two sets of data. The goal here is to find a correspondence between gene names from Takahashi data and from our and Wu data, passing by Ensembl IDs.
Some genes share the same symbol but not the Ensembl ID. Since in
Takahashi data, there are only symbols, we can not know the Ensembl ID
to which they correspond. We make a choice to select the first
Ensembl ID available for the symbol. For this, we select only
the unique symbol in the annotation table.
table(duplicated(annotation$SYMBOL))
##
## FALSE TRUE
## 46021 3430
dim(annotation)
## [1] 49451 2
annotation = annotation %>%
dplyr::filter(!duplicated(SYMBOL))
dim(annotation)
## [1] 46021 2
Now, we can set symbol as row names:
rownames(annotation) = annotation$SYMBOL
head(annotation)
## SYMBOL ENSEMBL_ID
## MIR1302-2HG MIR1302-2HG ENSG00000243485
## FAM138A FAM138A ENSG00000237613
## OR4F5 OR4F5 ENSG00000186092
## AL627309.1 AL627309.1 ENSG00000238009
## AL627309.3 AL627309.3 ENSG00000239945
## AL627309.4 AL627309.4 ENSG00000241599
For each dataset, we extract the count matrices and metadata. The
gene names in the count matrices will be convert to Ensembl ID, using
our correspondence of Wu and our data, or the annotation
table for Takahashi data.
our_correspondence = sobj_list[["here.2021_31"]]@assays[["RNA"]]@meta.features[, c("Ensembl_ID", "gene_name")]
our_correspondence$no_dup_gene_names = rownames(our_correspondence)
rownames(our_correspondence) = our_correspondence$Ensembl_ID
count_matrix_list = lapply(sobj_list, FUN = function(one_sobj) {
project_name = unique(one_sobj$project_name)
count_matrix = one_sobj@assays[["RNA"]]@counts
if (project_name %in% sample_info_3$project_name) {
# get Ensembl ID corresponding to gene names
this_dataset_annot = annotation[rownames(count_matrix), ]
# get gene names corresponding to Ensembl ID, using our correspondence (same dim)
this_dataset_annot = dplyr::left_join(x = this_dataset_annot,
y = our_correspondence,
by = c("ENSEMBL_ID" = "Ensembl_ID"))
# re set old gene names if none are available using our correspondence
this_dataset_annot = this_dataset_annot %>%
dplyr::mutate(no_dup_gene_names = ifelse(is.na(no_dup_gene_names),
yes = SYMBOL,
no = no_dup_gene_names))
rownames(this_dataset_annot) = this_dataset_annot$SYMBOL # all in the count matrices
# convert the row names of the count matrix
rownames(count_matrix) = this_dataset_annot[rownames(count_matrix), "no_dup_gene_names"]
}
return(count_matrix)
})
lapply(count_matrix_list, FUN = dim) %>%
do.call(rbind, .) %>%
`colnames<-`(c("Nb genes", "Nb cells"))
## Nb genes Nb cells
## here.2021_31 27955 885
## here.2021_36 27955 528
## here.2021_41 27955 1982
## here.2022_03 27955 3457
## here.2022_14 27955 2422
## here.2022_01 27955 835
## here.2022_02 27955 2002
## wu.F18 27955 1372
## wu.F31B 27955 4624
## wu.F31W 27955 3520
## wu.F59 27955 2445
## wu.F62B 27955 3221
## wu.F62W 27955 2360
## takahashi.GSM3717034 12060 2278
## takahashi.GSM3717035 14154 2832
## takahashi.GSM3717036 17354 2527
## takahashi.GSM3717037 32738 5465
## takahashi.GSM3717038 32738 5667
What is the aspect of the new upset plot ?
gene_names = lapply(count_matrix_list, FUN = rownames)
obj_upset = ComplexHeatmap::make_comb_mat(gene_names, mode = "distinct")
ComplexHeatmap::UpSet(obj_upset)
It almost did not change anything… Indeed:
table(rownames(sobj_list$takahashi.GSM3717037) %in% rownames(count_matrix_list$takahashi.GSM3717037))
##
## FALSE TRUE
## 52 32686
So for the final dataset, we will extract only the common genes between all datasets. It corresponds to the first column in tne upset plot.
common_genes = Reduce(x = lapply(count_matrix_list, FUN = rownames),
f = intersect)
length(common_genes)
## [1] 7785
We subset all datasets to keep only these genes :
count_matrix_list = lapply(count_matrix_list, FUN = function(one_count_matrix) {
one_count_matrix = one_count_matrix[common_genes, ]
return(one_count_matrix)
})
lapply(count_matrix_list, FUN = dim) %>%
do.call(rbind, .) %>%
`colnames<-`(c("Nb genes", "Nb cells"))
## Nb genes Nb cells
## here.2021_31 7785 885
## here.2021_36 7785 528
## here.2021_41 7785 1982
## here.2022_03 7785 3457
## here.2022_14 7785 2422
## here.2022_01 7785 835
## here.2022_02 7785 2002
## wu.F18 7785 1372
## wu.F31B 7785 4624
## wu.F31W 7785 3520
## wu.F59 7785 2445
## wu.F62B 7785 3221
## wu.F62W 7785 2360
## takahashi.GSM3717034 7785 2278
## takahashi.GSM3717035 7785 2832
## takahashi.GSM3717036 7785 2527
## takahashi.GSM3717037 7785 5465
## takahashi.GSM3717038 7785 5667
To merge all the count matrices, we add a prefix to each cell to avoid duplicated cell barcodes. The prefix is:
names(count_matrix_list) = names(count_matrix_list) %>%
stringr::str_split(string = .,
pattern = "\\.") %>%
lapply(., `[[`, 2) %>%
unlist()
names(sobj_list) = names(count_matrix_list)
names(count_matrix_list)
## [1] "2021_31" "2021_36" "2021_41" "2022_03" "2022_14"
## [6] "2022_01" "2022_02" "F18" "F31B" "F31W"
## [11] "F59" "F62B" "F62W" "GSM3717034" "GSM3717035"
## [16] "GSM3717036" "GSM3717037" "GSM3717038"
We add the prefix for each count matrix and merge them:
count_matrix_merge = lapply(names(count_matrix_list), FUN = function(one_project_name) {
one_count_matrix = count_matrix_list[[one_project_name]]
colnames(one_count_matrix) = paste0(one_project_name, "_", colnames(one_count_matrix))
return(one_count_matrix)
}) %>% do.call(cbind.data.frame, .)
dim(count_matrix_merge)
## [1] 7785 48422
count_matrix_merge[c(1:3), c(1:3)]
## 2021_31_AAACGAAGTTGGCCTG-1 2021_31_AAAGGATCATTAAAGG-1
## NOC2L 0 3
## TNFRSF18 2 0
## SDF4 1 3
## 2021_31_AAAGGGCGTATCAGGG-1
## NOC2L 0
## TNFRSF18 2
## SDF4 3
Similarly, we build a big metadata table:
columns_of_interest = c("orig.ident", "nCount_RNA", "nFeature_RNA",
"log_nCount_RNA", "project_name", "sample_identifier",
"sample_type", "Seurat.Phase", "cyclone.Phase",
"percent.mt", "percent.rb", "cell_type" )
metadata_merge = lapply(names(sobj_list), FUN = function(one_project_name) {
one_sobj = sobj_list[[one_project_name]]
metadata = one_sobj@meta.data
rownames(metadata) = paste0(one_project_name, "_", rownames(metadata))
metadata = metadata[, columns_of_interest]
return(metadata)
}) %>% do.call(rbind.data.frame, .)
dim(metadata_merge)
## [1] 48422 12
head(metadata_merge)
## orig.ident nCount_RNA nFeature_RNA log_nCount_RNA
## 2021_31_AAACGAAGTTGGCCTG-1 2021_31 6216 1872 8.734882
## 2021_31_AAAGGATCATTAAAGG-1 2021_31 44657 6174 10.706766
## 2021_31_AAAGGGCGTATCAGGG-1 2021_31 27344 5173 10.216252
## 2021_31_AAAGGTACAAATCGTC-1 2021_31 5776 1856 8.661467
## 2021_31_AAAGTCCCATACCAGT-1 2021_31 25356 4065 10.140771
## 2021_31_AAAGTCCGTGCCTACG-1 2021_31 8505 2346 9.048410
## project_name sample_identifier sample_type
## 2021_31_AAACGAAGTTGGCCTG-1 2021_31 HS_1 HS
## 2021_31_AAAGGATCATTAAAGG-1 2021_31 HS_1 HS
## 2021_31_AAAGGGCGTATCAGGG-1 2021_31 HS_1 HS
## 2021_31_AAAGGTACAAATCGTC-1 2021_31 HS_1 HS
## 2021_31_AAAGTCCCATACCAGT-1 2021_31 HS_1 HS
## 2021_31_AAAGTCCGTGCCTACG-1 2021_31 HS_1 HS
## Seurat.Phase cyclone.Phase percent.mt percent.rb
## 2021_31_AAACGAAGTTGGCCTG-1 S G1 2.8314028 25.59524
## 2021_31_AAAGGATCATTAAAGG-1 S G1 7.1769263 25.36221
## 2021_31_AAAGGGCGTATCAGGG-1 G2M G1 4.3812171 20.74312
## 2021_31_AAAGGTACAAATCGTC-1 G1 G1 0.2077562 11.53047
## 2021_31_AAAGTCCCATACCAGT-1 G1 G1 0.1183152 17.64080
## 2021_31_AAAGTCCGTGCCTACG-1 G1 G1 0.1293357 32.82775
## cell_type
## 2021_31_AAACGAAGTTGGCCTG-1 CD4 T cells
## 2021_31_AAAGGATCATTAAAGG-1 proliferative
## 2021_31_AAAGGGCGTATCAGGG-1 ORS
## 2021_31_AAAGGTACAAATCGTC-1 IBL
## 2021_31_AAAGTCCCATACCAGT-1 macrophages
## 2021_31_AAAGTCCGTGCCTACG-1 CD8 T cells
We generate a new Seurat object from the count matrix and the metadata:
dim(count_matrix_merge)
## [1] 7785 48422
dim(metadata_merge)
## [1] 48422 12
all.equal(rownames(metadata_merge), colnames(count_matrix_merge))
## [1] TRUE
sobj = Seurat::CreateSeuratObject(counts = count_matrix_merge,
meta.data = metadata_merge)
sobj
## An object of class Seurat
## 7785 features across 48422 samples within 1 assay
## Active assay: RNA (7785 features, 0 variable features)
We add the correspondence between gene names and gene ID.
sobj@assays$RNA@meta.features = annotation[common_genes, ]
head(sobj@assays$RNA@meta.features)
## SYMBOL ENSEMBL_ID
## NOC2L NOC2L ENSG00000188976
## TNFRSF18 TNFRSF18 ENSG00000186891
## SDF4 SDF4 ENSG00000078808
## UBE2J2 UBE2J2 ENSG00000160087
## AURKAIP1 AURKAIP1 ENSG00000175756
## CCNL2 CCNL2 ENSG00000221978
We remove what we will not use :
rm(sobj_list, annotation, corresp, our_correspondence, obj_upset,
gene_names, mart, plot_list,
sample_info_1, sample_info_2, sample_info_3, column_to_keep,
count_matrix_merge, count_matrix_list, metadata_merge,
column_to_keep, common_genes, columns_of_interest)
We keep a subset of meta.data and reset levels :
sobj$project_name = factor(sobj$project_name, levels = levels(sample_info$project_name))
summary(sobj@meta.data)
## orig.ident nCount_RNA nFeature_RNA log_nCount_RNA
## GSM3717038: 5667 Min. : 20 Min. : 20 Min. : 2.996
## GSM3717037: 5465 1st Qu.: 163 1st Qu.: 127 1st Qu.: 5.094
## F31B : 4624 Median : 4566 Median :1530 Median : 8.426
## F31W : 3520 Mean : 8501 Mean :1798 Mean : 7.636
## 2022_03 : 3457 3rd Qu.: 12954 3rd Qu.:2923 3rd Qu.: 9.469
## F62B : 3221 Max. :139803 Max. :7942 Max. :11.848
## (Other) :22468
## project_name sample_identifier sample_type
## GSM3717038: 5667 Takahashi_HD_5: 5667 HS : 9274
## GSM3717037: 5465 Takahashi_HD_4: 5465 HD : 2837
## F31B : 4624 Wu_HD_2 : 4624 Wu_HD :17542
## F31W : 3520 Wu_HD_3 : 3520 Takahashi_HD:18769
## 2022_03 : 3457 HS_4 : 3457
## F62B : 3221 Wu_HD_5 : 3221
## (Other) :22468 (Other) :22468
## Seurat.Phase cyclone.Phase percent.mt percent.rb
## G1 :30475 Length:48422 Min. : 0.000 Min. : 0.00
## G2M : 7407 Class :character 1st Qu.: 1.756 1st Qu.:17.25
## S :10406 Mode :character Median : 3.881 Median :23.58
## Undecided: 134 Mean : 4.750 Mean :22.53
## 3rd Qu.: 6.452 3rd Qu.:28.48
## Max. :20.000 Max. :50.00
##
## cell_type
## IBL :11134
## IFE : 8379
## ORS : 4816
## HFSC : 4500
## cuticle: 3794
## cortex : 3064
## (Other):12735
We remove genes that are expressed in less than 5 cells :
sobj = aquarius::filter_features(sobj, min_cells = 5)
## [1] 7785 48422
## [1] 7785 48422
sobj
## An object of class Seurat
## 7785 features across 48422 samples within 1 assay
## Active assay: RNA (7785 features, 0 variable features)
We remove cells expressing less than 200 genes.
sobj = subset(sobj, nFeature_RNA > 250)
sobj
## An object of class Seurat
## 7785 features across 35120 samples within 1 assay
## Active assay: RNA (7785 features, 0 variable features)
How many cells by sample ?
table(sobj$project_name)
##
## 2021_31 2021_36 2021_41 2022_03 2022_14 2022_01 2022_02
## 885 528 1982 3457 2422 835 2002
## F18 F31B F31W F59 F62B F62W GSM3717034
## 1372 4624 3520 2445 3221 2360 47
## GSM3717035 GSM3717036 GSM3717037 GSM3717038
## 252 418 3740 1010
We represent this information as a barplot :
aquarius::plot_barplot(df = table(sobj$project_name,
sobj$cell_type) %>%
as.data.frame.table() %>%
`colnames<-`(c("Sample", "Cell Type", "Number")),
x = "Sample", y = "Number", fill = "Cell Type",
position = position_fill()) +
ggplot2::scale_fill_manual(values = unlist(color_markers),
breaks = names(color_markers),
name = "Cell Type")
This is the same barplot with another position :
aquarius::plot_barplot(df = table(sobj$project_name,
sobj$cell_type) %>%
as.data.frame.table() %>%
`colnames<-`(c("Sample", "Cell Type", "Number")),
x = "Sample", y = "Number", fill = "Cell Type",
position = position_stack()) +
ggplot2::scale_fill_manual(values = unlist(color_markers),
breaks = names(color_markers),
name = "Cell Type")
We normalize the count matrix for remaining cells and select highly variable features :
sobj = Seurat::NormalizeData(sobj,
normalization.method = "LogNormalize")
sobj = Seurat::FindVariableFeatures(sobj, nfeatures = 2000)
sobj = Seurat::ScaleData(sobj)
sobj
## An object of class Seurat
## 7785 features across 35120 samples within 1 assay
## Active assay: RNA (7785 features, 2000 variable features)
We perform a PCA :
sobj = Seurat::RunPCA(sobj,
assay = "RNA",
reduction.name = "RNA_pca",
npcs = 100,
seed.use = 1337L)
sobj
## An object of class Seurat
## 7785 features across 35120 samples within 1 assay
## Active assay: RNA (7785 features, 2000 variable features)
## 1 dimensional reduction calculated: RNA_pca
We choose the number of dimensions such that they summarize 60 % of the variability :
stdev = sobj@reductions[["RNA_pca"]]@stdev
stdev_prop = cumsum(stdev)/sum(stdev)
ndims = which(stdev_prop > 0.60)[1]
ndims
## [1] 38
We can visualize this on the elbow plot :
elbow_p = Seurat::ElbowPlot(sobj, ndims = 100, reduction = "RNA_pca") +
ggplot2::geom_point(x = ndims, y = stdev[ndims], col = "red")
x_text = ggplot_build(elbow_p)$layout$panel_params[[1]]$x$get_labels() %>% as.numeric()
elbow_p = elbow_p +
ggplot2::scale_x_continuous(breaks = sort(c(x_text, ndims)), limits = c(0, 100))
x_color = ifelse(ggplot_build(elbow_p)$layout$panel_params[[1]]$x$get_labels() %>%
as.numeric() %>% round(., 2) == round(ndims, 2), "red", "black")
elbow_p = elbow_p +
ggplot2::theme_classic() +
ggplot2::theme(axis.text.x = element_text(color = x_color))
elbow_p
We generate a tSNE and a UMAP with 38 principal components :
sobj = Seurat::RunTSNE(sobj,
reduction = "RNA_pca",
dims = 1:ndims,
seed.use = 1337L,
num_threads = n_threads, # Rtsne::Rtsne option
reduction.name = paste0("RNA_pca_", ndims, "_tsne"))
sobj = Seurat::RunUMAP(sobj,
reduction = "RNA_pca",
dims = 1:ndims,
seed.use = 1337L,
reduction.name = paste0("RNA_pca_", ndims, "_umap"))
(Time to run : 139.3 s)
We can visualize the two representations :
tsne = Seurat::DimPlot(sobj, group.by = "project_name",
reduction = paste0("RNA_pca_", ndims, "_tsne")) +
ggplot2::scale_color_manual(values = sample_info$color,
breaks = sample_info$project_name) +
Seurat::NoAxes() + ggplot2::ggtitle("PCA - tSNE") +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
legend.position = "none")
umap = Seurat::DimPlot(sobj, group.by = "project_name",
reduction = paste0("RNA_pca_", ndims, "_umap")) +
ggplot2::scale_color_manual(values = sample_info$color,
breaks = sample_info$project_name) +
Seurat::NoAxes() + ggplot2::ggtitle("PCA - UMAP") +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5))
tsne | umap
We remove sample specific effect on the pca using Harmony :
`%||%` = function(lhs, rhs) {
if (!is.null(x = lhs)) {
return(lhs)
} else {
return(rhs)
}
}
set.seed(1337L)
sobj = harmony::RunHarmony(object = sobj,
group.by.vars = "project_name",
plot_convergence = TRUE,
reduction = "RNA_pca",
assay.use = "RNA",
reduction.save = "harmony",
max.iter.harmony = 50,
project.dim = FALSE)
(Time
to run : 198.79 s)
From this batch-effect removed projection, we generate a tSNE and a UMAP.
sobj = Seurat::RunUMAP(sobj,
seed.use = 1337L,
dims = 1:ndims,
reduction = "harmony",
reduction.name = paste0("harmony_", ndims, "_umap"),
reduction.key = paste0("harmony_", ndims, "umap_"))
sobj = Seurat::RunTSNE(sobj,
dims = 1:ndims,
seed.use = 1337L,
num_threads = n_threads, # Rtsne::Rtsne option
reduction = "harmony",
reduction.name = paste0("harmony_", ndims, "_tsne"),
reduction.key = paste0("harmony", ndims, "tsne_"))
(Time to run : 134.54 s)
We visualize the corrected projections :
tsne = Seurat::DimPlot(sobj, group.by = "project_name",
reduction = paste0("harmony_", ndims, "_tsne")) +
ggplot2::scale_color_manual(values = sample_info$color,
breaks = sample_info$project_name) +
Seurat::NoAxes() + ggplot2::ggtitle("PCA - harmony - tSNE") +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
legend.position = "none")
umap = Seurat::DimPlot(sobj, group.by = "project_name",
reduction = paste0("harmony_", ndims, "_umap")) +
ggplot2::scale_color_manual(values = sample_info$color,
breaks = sample_info$project_name) +
Seurat::NoAxes() + ggplot2::ggtitle("PCA - harmony - UMAP") +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5))
tsne | umap
We will keep the tSNE from harmony :
reduction = "harmony"
name2D = paste0("harmony_", ndims, "_tsne")
We generate a clustering :
sobj = Seurat::FindNeighbors(sobj, reduction = reduction, dims = 1:ndims)
sobj = Seurat::FindClusters(sobj, resolution = 1.5)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 35120
## Number of edges: 1447874
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8757
## Number of communities: 36
## Elapsed time: 7 seconds
clusters_plot = Seurat::DimPlot(sobj, reduction = name2D, label = TRUE) +
Seurat::NoAxes() + Seurat::NoLegend() +
ggplot2::labs(title = "Clusters ID") +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5))
clusters_plot
We represent the 4 quality metrics :
plot_list = Seurat::FeaturePlot(sobj, reduction = name2D,
combine = FALSE, pt.size = 0.25,
features = c("percent.mt", "percent.rb", "log_nCount_RNA", "nFeature_RNA"))
plot_list = lapply(plot_list, FUN = function(one_plot) {
one_plot +
Seurat::NoAxes() +
ggplot2::scale_color_gradientn(colors = aquarius:::color_gene) +
ggplot2::theme(aspect.ratio = 1)
})
patchwork::wrap_plots(plot_list, nrow = 1)
We can represent clusters, split by sample of origin :
plot_list = aquarius::plot_split_dimred(sobj,
reduction = name2D,
split_by = "project_name",
group_by = "seurat_clusters",
split_color = setNames(sample_info$color,
nm = sample_info$project_name),
group_color = aquarius::gg_color_hue(length(levels(sobj$seurat_clusters))))
plot_list[[length(plot_list) + 1]] = clusters_plot +
ggplot2::labs(title = "Cluster ID") &
ggplot2::theme(plot.title = element_text(hjust = 0.5, size = 15))
patchwork::wrap_plots(plot_list, ncol = 4) &
Seurat::NoLegend()
We visualize cell type :
plot_list = lapply((c(paste0("RNA_pca_", ndims, "_tsne"),
paste0("RNA_pca_", ndims, "_umap"),
paste0("harmony_", ndims, "_tsne"),
paste0("harmony_", ndims, "_umap"))),
FUN = function(one_red) {
Seurat::DimPlot(sobj, group.by = "cell_type",
reduction = one_red,
cols = color_markers) +
Seurat::NoAxes() + ggplot2::ggtitle(one_red) +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5))
})
patchwork::wrap_plots(plot_list, nrow = 2) +
patchwork::plot_layout(guides = "collect")
We make a representation split by origin to show cell types :
plot_list = aquarius::plot_split_dimred(sobj,
reduction = name2D,
split_by = "project_name",
split_color = setNames(sample_info$color,
nm = sample_info$project_name),
group_by = "cell_type",
group_color = color_markers)
plot_list[[length(plot_list) + 1]] = patchwork::guide_area()
patchwork::wrap_plots(plot_list, ncol = 4) +
patchwork::plot_layout(guides = "collect") &
ggplot2::theme(legend.position = "right")
We visualize cell cycle annotation, and BIRC5 and TOP2A expression levels :
plot_list = list()
# Seurat
plot_list[[1]] = Seurat::DimPlot(sobj, group.by = "Seurat.Phase",
reduction = name2D) +
Seurat::NoAxes() + ggplot2::labs(title = "Seurat annotation") +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5))
# cyclone
plot_list[[2]] = Seurat::DimPlot(sobj, group.by = "cyclone.Phase",
reduction = name2D) +
Seurat::NoAxes() + ggplot2::labs(title = "cyclone annotation") +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5))
# BIRC5
plot_list[[3]] = Seurat::FeaturePlot(sobj, features = "BIRC5",
reduction = name2D) +
ggplot2::scale_color_gradientn(colors = aquarius::color_gene) +
Seurat::NoAxes() +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5))
# TOP2A
plot_list[[4]] = Seurat::FeaturePlot(sobj, features = "TOP2A",
reduction = name2D) +
ggplot2::scale_color_gradientn(colors = aquarius::color_gene) +
Seurat::NoAxes() +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5))
patchwork::wrap_plots(plot_list, ncol = 2)
We save the Seurat object :
saveRDS(sobj, file = paste0(out_dir, "/", save_name, "_sobj.rds"))
## R version 3.6.3 (2020-02-29)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 24.04.1 LTS
##
## Matrix products: default
## BLAS: /usr/local/lib/R/lib/libRblas.so
## LAPACK: /usr/local/lib/R/lib/libRlapack.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] parallel stats4 grid stats graphics grDevices utils
## [8] datasets methods base
##
## other attached packages:
## [1] org.Hs.eg.db_3.10.0 AnnotationDbi_1.48.0 IRanges_2.20.2
## [4] S4Vectors_0.24.4 Biobase_2.46.0 BiocGenerics_0.32.0
## [7] ComplexHeatmap_2.14.0 ggplot2_3.3.5 patchwork_1.1.2
## [10] dplyr_1.0.7
##
## loaded via a namespace (and not attached):
## [1] softImpute_1.4 graphlayouts_0.7.0
## [3] pbapply_1.4-2 lattice_0.20-41
## [5] haven_2.3.1 vctrs_0.3.8
## [7] usethis_2.0.1 dynwrap_1.2.1
## [9] blob_1.2.1 survival_3.2-13
## [11] prodlim_2019.11.13 dynutils_1.0.5
## [13] later_1.3.0 DBI_1.1.1
## [15] R.utils_2.11.0 SingleCellExperiment_1.8.0
## [17] rappdirs_0.3.3 uwot_0.1.8
## [19] dqrng_0.2.1 jpeg_0.1-8.1
## [21] zlibbioc_1.32.0 pspline_1.0-18
## [23] pcaMethods_1.78.0 mvtnorm_1.1-1
## [25] htmlwidgets_1.5.4 GlobalOptions_0.1.2
## [27] future_1.22.1 UpSetR_1.4.0
## [29] laeken_0.5.2 leiden_0.3.3
## [31] clustree_0.4.3 scater_1.14.6
## [33] irlba_2.3.3 DEoptimR_1.0-9
## [35] tidygraph_1.1.2 Rcpp_1.0.9
## [37] readr_2.0.2 KernSmooth_2.23-17
## [39] carrier_0.1.0 promises_1.1.0
## [41] gdata_2.18.0 DelayedArray_0.12.3
## [43] limma_3.42.2 graph_1.64.0
## [45] RcppParallel_5.1.9 Hmisc_4.4-0
## [47] fs_1.5.2 RSpectra_0.16-0
## [49] fastmatch_1.1-0 ranger_0.12.1
## [51] digest_0.6.25 png_0.1-7
## [53] sctransform_0.2.1 cowplot_1.0.0
## [55] DOSE_3.12.0 here_1.0.1
## [57] TInGa_0.0.0.9000 ggraph_2.0.3
## [59] pkgconfig_2.0.3 GO.db_3.10.0
## [61] DelayedMatrixStats_1.8.0 gower_0.2.1
## [63] ggbeeswarm_0.6.0 iterators_1.0.12
## [65] DropletUtils_1.6.1 reticulate_1.26
## [67] clusterProfiler_3.14.3 SummarizedExperiment_1.16.1
## [69] circlize_0.4.15 beeswarm_0.4.0
## [71] GetoptLong_1.0.5 xfun_0.35
## [73] bslib_0.3.1 zoo_1.8-10
## [75] tidyselect_1.1.0 reshape2_1.4.4
## [77] purrr_0.3.4 ica_1.0-2
## [79] pcaPP_1.9-73 viridisLite_0.3.0
## [81] rtracklayer_1.46.0 rlang_1.0.2
## [83] hexbin_1.28.1 jquerylib_0.1.4
## [85] dyneval_0.9.9 glue_1.4.2
## [87] RColorBrewer_1.1-2 matrixStats_0.56.0
## [89] stringr_1.4.0 lava_1.6.7
## [91] europepmc_0.3 DESeq2_1.26.0
## [93] recipes_0.1.17 labeling_0.3
## [95] harmony_0.1.0 httpuv_1.5.2
## [97] class_7.3-17 BiocNeighbors_1.4.2
## [99] DO.db_2.9 annotate_1.64.0
## [101] jsonlite_1.7.2 XVector_0.26.0
## [103] bit_4.0.4 mime_0.9
## [105] aquarius_0.1.5 Rsamtools_2.2.3
## [107] gridExtra_2.3 gplots_3.0.3
## [109] stringi_1.4.6 processx_3.5.2
## [111] gsl_2.1-6 bitops_1.0-6
## [113] cli_3.0.1 batchelor_1.2.4
## [115] RSQLite_2.2.0 randomForest_4.6-14
## [117] tidyr_1.1.4 data.table_1.14.2
## [119] rstudioapi_0.13 org.Mm.eg.db_3.10.0
## [121] GenomicAlignments_1.22.1 nlme_3.1-147
## [123] qvalue_2.18.0 scran_1.14.6
## [125] locfit_1.5-9.4 scDblFinder_1.1.8
## [127] listenv_0.8.0 ggthemes_4.2.4
## [129] gridGraphics_0.5-0 R.oo_1.24.0
## [131] dbplyr_1.4.4 TTR_0.24.2
## [133] readxl_1.3.1 lifecycle_1.0.1
## [135] timeDate_3043.102 ggpattern_0.3.1
## [137] munsell_0.5.0 cellranger_1.1.0
## [139] R.methodsS3_1.8.1 proxyC_0.1.5
## [141] visNetwork_2.0.9 caTools_1.18.0
## [143] codetools_0.2-16 GenomeInfoDb_1.22.1
## [145] vipor_0.4.5 lmtest_0.9-38
## [147] msigdbr_7.5.1 htmlTable_1.13.3
## [149] triebeard_0.3.0 lsei_1.3-0
## [151] xtable_1.8-4 ROCR_1.0-7
## [153] BiocManager_1.30.10 scatterplot3d_0.3-41
## [155] abind_1.4-5 farver_2.0.3
## [157] parallelly_1.28.1 RANN_2.6.1
## [159] askpass_1.1 GenomicRanges_1.38.0
## [161] RcppAnnoy_0.0.16 tibble_3.1.5
## [163] ggdendro_0.1-20 cluster_2.1.0
## [165] future.apply_1.5.0 Seurat_3.1.5
## [167] dendextend_1.15.1 Matrix_1.3-2
## [169] ellipsis_0.3.2 prettyunits_1.1.1
## [171] lubridate_1.7.9 ggridges_0.5.2
## [173] igraph_1.2.5 RcppEigen_0.3.3.7.0
## [175] fgsea_1.12.0 remotes_2.4.2
## [177] scBFA_1.0.0 destiny_3.0.1
## [179] VIM_6.1.1 testthat_3.1.0
## [181] htmltools_0.5.2 BiocFileCache_1.10.2
## [183] yaml_2.2.1 utf8_1.1.4
## [185] plotly_4.9.2.1 XML_3.99-0.3
## [187] ModelMetrics_1.2.2.2 e1071_1.7-3
## [189] foreign_0.8-76 withr_2.5.0
## [191] fitdistrplus_1.0-14 BiocParallel_1.20.1
## [193] xgboost_1.4.1.1 bit64_4.0.5
## [195] foreach_1.5.0 robustbase_0.93-9
## [197] Biostrings_2.54.0 GOSemSim_2.13.1
## [199] rsvd_1.0.3 memoise_2.0.0
## [201] evaluate_0.18 forcats_0.5.0
## [203] rio_0.5.16 geneplotter_1.64.0
## [205] tzdb_0.1.2 caret_6.0-86
## [207] ps_1.6.0 DiagrammeR_1.0.6.1
## [209] curl_4.3 fdrtool_1.2.15
## [211] fansi_0.4.1 highr_0.8
## [213] urltools_1.7.3 xts_0.12.1
## [215] GSEABase_1.48.0 acepack_1.4.1
## [217] edgeR_3.28.1 checkmate_2.0.0
## [219] scds_1.2.0 cachem_1.0.6
## [221] npsurv_0.4-0 babelgene_22.3
## [223] rjson_0.2.20 openxlsx_4.1.5
## [225] ggrepel_0.9.1 clue_0.3-60
## [227] rprojroot_2.0.2 stabledist_0.7-1
## [229] tools_3.6.3 sass_0.4.0
## [231] nichenetr_1.1.1 magrittr_2.0.1
## [233] RCurl_1.98-1.2 proxy_0.4-24
## [235] car_3.0-11 ape_5.3
## [237] ggplotify_0.0.5 xml2_1.3.2
## [239] httr_1.4.2 assertthat_0.2.1
## [241] rmarkdown_2.18 boot_1.3-25
## [243] globals_0.14.0 R6_2.4.1
## [245] Rhdf5lib_1.8.0 nnet_7.3-14
## [247] RcppHNSW_0.2.0 progress_1.2.2
## [249] genefilter_1.68.0 statmod_1.4.34
## [251] gtools_3.8.2 shape_1.4.6
## [253] HDF5Array_1.14.4 BiocSingular_1.2.2
## [255] rhdf5_2.30.1 splines_3.6.3
## [257] AUCell_1.8.0 carData_3.0-4
## [259] colorspace_1.4-1 generics_0.1.0
## [261] base64enc_0.1-3 dynfeature_1.0.0
## [263] smoother_1.1 gridtext_0.1.1
## [265] pillar_1.6.3 tweenr_1.0.1
## [267] sp_1.4-1 ggplot.multistats_1.0.0
## [269] rvcheck_0.1.8 GenomeInfoDbData_1.2.2
## [271] plyr_1.8.6 gtable_0.3.0
## [273] zip_2.2.0 knitr_1.41
## [275] latticeExtra_0.6-29 biomaRt_2.42.1
## [277] fastmap_1.1.0 ADGofTest_0.3
## [279] copula_1.0-0 doParallel_1.0.15
## [281] vcd_1.4-8 babelwhale_1.0.1
## [283] openssl_1.4.1 scales_1.1.1
## [285] backports_1.2.1 ipred_0.9-12
## [287] enrichplot_1.6.1 hms_1.1.1
## [289] ggforce_0.3.1 Rtsne_0.15
## [291] shiny_1.7.1 numDeriv_2016.8-1.1
## [293] polyclip_1.10-0 lazyeval_0.2.2
## [295] Formula_1.2-3 tsne_0.1-3
## [297] crayon_1.3.4 MASS_7.3-54
## [299] pROC_1.16.2 viridis_0.5.1
## [301] dynparam_1.0.0 rpart_4.1-15
## [303] zinbwave_1.8.0 compiler_3.6.3
## [305] ggtext_0.1.0